require(plyr)
require(dplyr)
require(tidyr)
require(tidyverse)
require(lmtest)
require(stargazer)
require(sandwich)
require(haven)
require(splines)
require(broom)
require(data.table)
require(dotwhisker)
require(gdata)
require(readxl)
require(stm)
require(xtable)
require(foreach)

# Set working directory
setwd("...")

#### IMPORT DATA ####

# Core data
d = fread("./data/tropeData.csv")
d$geo_region = factor(d$geo_region, 
                      levels = c("Western Europe", 
                                 "Americas",
                                 "Eastern Europe", 
                                 "Middle East/Northern Africa", 
                                 "Sub-Saharan Africa", 
                                 "Asia",  
                                 "Oceania"))
d$geo_region2 = factor(d$geo_region2, 
                       levels = c("Western Europe", 
                                  "Americas",
                                  "Eastern Europe", 
                                  "Middle East/Northern Africa", 
                                  "Sub-Saharan Africa", 
                                  "Asia",  
                                  "Oceania",
                                  "Vietnam", "USSR", "South Africa"))

# Predictions and topic data from STM
load("./data/predictionsAndTopics.RData")

# Hand-coded topic labels for STM
topicLabels = read_xlsx("./data/stmLabels.xlsx") 


#### APPENDIX 2: MEASURING TROPES ####

#### APPENDIX 2.1: Method 1: Trope Dictionaries ####

#### Table A3: Dictionary of terms for infantilization ####
names(synCounts$INF)

#### Table A4: Dictionary of terms for animal analogies ####
names(synCounts$ANIM)

#### Table A5: Dictionary of terms for belligerence ####
names(synCounts$BELLIG)

#### Table A6: Dictionary of terms for irrationality ####
names(synCounts$IRR)


#### APPENDIX 2.2: Method 2: Supervised Learning ####

#### Table A8: Total number of positive hand-coded examples for each trope ####

tset |> summarize(across(INF:IRR, sum, na.rm=T))


#### Table A10: Performance and descriptive statistics for highest-quality predictions ####

# See code for producing Table 2 in modelMetrics.R


#### Table A11: Kappa ####

# See code in modelMetrics.R


#### Table A12: F1 ####

# See code in modelMetrics.R


#### Table A13: AUC ####

# See code in modelMetrics.R


#### Table A14: Accuracy ####

# See code in modelMetrics.R


#### Table A15: Confusion matrices for best performing models ####

# See code in getPredictedData.R


#### Table A16: Examples of false positives from best performing supervised learning models ####

# Combine hand-coded entries with main data
tsetsub = tset |> dplyr::select(entryID, INF:IRR)
ddt = merge(d, tsetsub, by="entryID")
ddt = ddt |> distinct(entryID, .keep_all = T)

# Note that the code below presents multiple examples; Table A16 includes just one from each
# Infantilization
ddt |> filter(ddt$predINF > 0.75 & ddt$INF == 0) |> dplyr::select(date, tt)

# Animal analogy
ddt |> filter(ddt$predANIM > 0.9 & ddt$ANIM == 0) |> dplyr::select(date, tt)

# Belligerence
ddt |> filter(ddt$predBELLIG > 0.82 & ddt$BELLIG == 0) |> dplyr::select(date, tt)

# Irrationality
ddt |> filter(ddt$predIRR > 0.6 & ddt$IRR == 0) |> dplyr::select(date, tt)


#### Table A17: Examples of false negatives from best performing supervised learning models ####
# Note that this code presents multiple examples; Table A16 includes just one from each

# Infantilization
ddt |> filter(ddt$predINF < 0.1 & ddt$INF == 1) |> dplyr::select(date, tt)

# Animal analogy
ddt |> filter(ddt$predANIM < 0.15 & ddt$ANIM == 1) |> dplyr::select(date, tt)

# Belligerence
ddt |> filter(ddt$predBELLIG < 0.15 & ddt$BELLIG == 1) |> dplyr::select(date, tt)

# Irrationality
ddt |> filter(ddt$predIRR < 0.025 & ddt$IRR == 1) |> dplyr::select(date, tt)


#### APPENDIX 3: QUANTITATIVE DESCRIPTIVE STATISTICS ####

#### Table A18: Descriptive statistics for continuous variables ####
desc = d %>% dplyr::select(predINF, predANIM, predBELLIG, predIRR, 
                           countINF, countANIM, countBELLIG, countIRR,
                           logYSI, conf_high, personal, leaderTenure, UStrade, USmilaid, logWords)
summary(desc)

#### Table A19: Descriptive statistics for binary variables ####
table(d$globalsouth)
table(d$democracy)
table(d$leaderMention)
table(d$USdefense)
table(d$geo_region)


#### Table A20: Correlation matrix of outcome variables ####
d |> dplyr::select(matches("pred")) |> 
  cor(use="complete.obs") |> round(3)


#### Table A21: Correlation matrix of explanatory and control variables ####
d |> dplyr::select(globalsouth, logYSI, conf_high, democracy, personal, leaderMention,
                   UStrade, USmilaid, USdefense, logWords) |>
  cor(use="complete.obs") |> round(3)


#### Figure A3: Loess curves of trope prevalence over time ####
d |>
  mutate(pcountINF = countINF/nWords,
         pcountANIM = countANIM/nWords,
         pcountBELLIG = countBELLIG/nWords,
         pcountIRR = countIRR/nWords) |>
  dplyr::select(date, globalsouth, geo_region, 
                predINF, predANIM, predBELLIG, predIRR,
                pcountINF, pcountANIM, pcountBELLIG, pcountIRR) |>
  pivot_longer(starts_with(c("pred", "pcount")), names_to="trope", values_to="value") |>
  mutate(tropeName=ifelse(trope %in% c("predINF", "pcountINF"), "Infantilization",
                          ifelse(trope %in% c("predANIM", "pcountANIM"), "Animal analogy", 
                                 ifelse(trope %in% c("predBELLIG", "pcountBELLIG"), "Belligerence", "Irrationality")))) |>
  mutate(tropeName = factor(tropeName, levels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))) |>
  mutate(modelType = ifelse(grepl("pred", trope), "Probabilities", "Counts")) |>
  ggplot(aes(date, value)) + 
  geom_smooth(aes(color=tropeName), method="loess", span=0.2, se=F, linewidth=0.6) +
  theme_bw() + 
  ylab("Measure") +
  scale_x_date("Year", minor_breaks = "1 year") +
  scale_color_discrete("Trope") +
  facet_wrap(vars(fct_rev(modelType)), nrow=1, ncol=2, dir="h", scales="free_y") +
  geom_hline(yintercept = 0, linetype=2) +
  theme(legend.position = "bottom")

ggsave("./figures/tropesOverTimeAll.pdf", width=7, height=3.5)


#### APPENDIX 4: FULL RESULTS AND ROBUSTNESS CHECKS ####

#### APPENDIX 4.1: Full results ####

#### Table A22: Results of regressions ####

# Define variables and labels
controls1 = "+ conf_high + 
                   democracy + 
                   personal + 
                   leaderMention + 
                   UStrade + 
                   USmilaid + 
                   USdefense + 
                   logWords + factor(country)"
varLabels1 = c("Global South", "Years since independence", 
               "Conflict", 
               "Democracy", 
               "Personalism", 
               "Leader mention", 
               "US trade", 
               "US military aid", 
               "US defense", 
               "Entry length")

theIV = "~ globalsouth + logYSI"
predControls = controls1
countControls = controls1
varLabels = varLabels1
dataset="d"
showPreds = TRUE

# Run models
source("./code/doRegressions.R")
modelsPlain = outList

# Note: This code is identical to the code to produce Table 4 in the main text; see analyzeTropes.R
stargazer(modelsPlain[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels)


#### APPENDIX 4.2: Presidents and Time ####

#### Table A23: Results of regressions, including president fixed effects and cubic time spline ####

# Define variables and labels
controls1a = paste0(controls1, "+ factor(admin) + splines::bs(date, 3)")

theIV = "~ globalsouth + logYSI"
predControls = controls1
countControls = controls1a
varLabels = varLabels1
dataset="d"
showPreds = FALSE

# Run regressions and store results
source("./code/doRegressions.R")
modelsAdmin = outList

# Standard estimates (not shown in text)
# Note that the number of observations in each model appears in this code.
stargazer(modelsAdmin[5:8], 
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("country", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = c(varLabels, "Johnson", "Nixon", "Ford"))
# Estimates with clustered standard errors (shown in text)
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsAdmin[13:16],
          column.sep.width = "0pt", no.space=T, align=T, df=F,
          omit=c("country", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = c(varLabels, "Johnson", "Nixon", "Ford"))


#### APPENDIX 4.3: Entry Topics with a Structural Topic Model ####

#### Table A24: Topics and propensities for PDB entries, according to 65-topic STM ####
topicProps = topicData |> 
  summarize_if(is.numeric, summary) |> 
  round(3) |> t() |> as.data.frame()
names(topicProps) = c("Min.", "Q1", "Med.", "Mean", "Q3", "Max.")
topicProps$topic = topicLabels$newtopic
topicProps = topicProps |> dplyr::select(topic, everything())
topicProps


#### Table A25: Top ten words for each topic in terms of FREX ####
repWords = labelTopics(segstm, n=10) 
repWordsOut = data.frame(repWords$frex)
repWordsTable = data.frame(frex=apply(repWordsOut, 1, paste, collapse=", "))
xtable(repWordsTable)


#### Table A26: Accuracy in Random 4 Word Set Intrusion task for individual topics in 65-topic STM ####

gd = read_xlsx("./data/stmCoherenceValidation.xlsx")

gd2 = gd |> group_by(group) |> mutate(correct = ifelse(intruder==guess, 1, 0))
sum(gd2$correct, na.rm=T)/length(unique(gd$group)) # Overall accuracy: 0.938

# Create table of validation performance for each topic
intrudeProps = foreach (i = 1:65, .combine="rbind") %do% {
  isTopic = sum(gd2$topic==i)
  isTopicInd = gd2$group[which(gd2$topic==i)]
  
  # Get all entries that were about this topic
  topicEntries = gd2 |> filter(group %in% isTopicInd) |> filter(intruder==1)
  
  # How many were correctly identified?
  totalRight = sum(topicEntries$correct, na.rm=T)
  
  # Place into dataframe
  data.frame(topic=topicLabels$newtopic[i], total=nrow(topicEntries), correct=totalRight, prCorrect=totalRight/nrow(topicEntries))
}
print(xtable(intrudeProps, digits=3))


#### Table A27: Accuracy in Optimal Labor Task for individual topics in 65-topic STM ####

gt = read_xlsx("./data/stmLabelValidation.xlsx")
gt$guess = ifelse(gt$guess!="", 1, 0)

gt2 = gt |> mutate(correct = ifelse(rightLabel==guess, 1, 0))
sum(gt2$correct, na.rm=T)/length(unique(gt2$group)) # Overall accuracy: 0.894

# Create table of validation performance for each topic
validTopics = topicLabels$newtopic
validProps = foreach (i = 1:length(validTopics), .combine="rbind") %do% {
  totalReal = sum(gt$topic==validTopics[i])
  topicEntriesInd = gt$group[which(gt$topic==validTopics[i])]
  
  # Get all entries that were about this topic
  topicEntries = gt |> filter(group %in% topicEntriesInd)
  
  # How many were correctly identified?
  totalRight = sum(topicEntries$rightLabel==topicEntries$guess, na.rm=T)
  
  # Place into dataframe
  data.frame(topic=validTopics[i], total=totalReal, correct=totalRight, prCorrect=totalRight/totalReal)
}
print(xtable(validProps, digits=3))


#### Table A28: Results of regressions, including topics from STM ####

# Define variables and labels
keyTopics = paste("+", paste(paste0("Topic", c(1,2,3,4,5,6,8,9,
                                               11,13,14,17,
                                               20,21,22,24,26,28,
                                               38,
                                               41,43,44,45,46,47,
                                               50,52,53,54,55,59,
                                               60,61,62,64)), collapse=" + "))

controlsSTM = paste0(controls1, keyTopics)
controlsSTMa = paste0(controls1a, keyTopics)

theIV = "~ globalsouth + logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels1
dataset="d"
showPreds = FALSE

# Run regressions and store results
source("./code/doRegressions.R")
modelsSTM = outList

# Standard estimates (not shown in text)
# Note that the number of observations in each model appears in this code.
stargazer(modelsSTM[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels1)
# Estimates with clustered standard errors (shown in text)
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsSTM[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels1)


#### Figure A4: Coefficient plots for topics in regressions from Table A28 ####

# Import hand-coded topic names
topicLabels$term = paste0("Topic", topicLabels$topic)
topicLabels = topicLabels %>% dplyr::select(term, newtopic)

# Tidied data from quasibinomial models with STM
coef_p_inf_s = tidy(modelsSTM$p_inf_c) %>% filter(grepl("Topic", term))
coef_p_anim_s = tidy(modelsSTM$p_anim_c) %>% filter(grepl("Topic", term))
coef_p_bel_s = tidy(modelsSTM$p_bel_c) %>% filter(grepl("Topic", term))
coef_p_irr_s = tidy(modelsSTM$p_irr_c) %>% filter(grepl("Topic", term))

# Tidied data from Poisson models with STM
coef_c_inf_s = tidy(modelsSTM$c_inf_c) %>% filter(grepl("Topic", term))
coef_c_anim_s = tidy(modelsSTM$c_anim_c) %>% filter(grepl("Topic", term))
coef_c_bel_s = tidy(modelsSTM$c_bel_c) %>% filter(grepl("Topic", term))
coef_c_irr_s = tidy(modelsSTM$c_irr_c) %>% filter(grepl("Topic", term))

# Put all together
topicCoefs = rbind(coef_p_inf_s, coef_p_anim_s, coef_p_bel_s, coef_p_irr_s,
                   coef_c_inf_s, coef_c_anim_s, coef_c_bel_s, coef_c_irr_s)
topicCoefs$model = rep(c("Probabilities + Topics\n(Quasibinomial)", "Counts + Topics\n(Poisson)"), each=35*4)
topicCoefs$model = factor(topicCoefs$model, levels=c("Counts + Topics\n(Poisson)", "Probabilities + Topics\n(Quasibinomial)"))
topicCoefs$trope = rep(rep(c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"), each=35), 2)
topicCoefs$trope = factor(topicCoefs$trope, levels=c("Infantilization", "Animal Analogies", "Belligerence", "Irrationality"))
topicCoefs$topicNumber = as.numeric(gsub("Topic", "", topicCoefs$term))
topicCoefs = topicCoefs %>% arrange(topicNumber)

topicCoefs = merge(topicCoefs, topicLabels)
topicCoefs$newtopicN = paste0(topicCoefs$topicNumber, ": ", topicCoefs$newtopic)
topicCoefs$newtopicN = factor(topicCoefs$newtopicN, levels=unique(topicCoefs$newtopicN[order(topicCoefs$topicNumber)]), ordered=TRUE)
topicCoefs$signif = ifelse(topicCoefs$p.value < 0.05, "Yes", "No")

ggplot(topicCoefs, aes(fct_rev(newtopicN), estimate)) + geom_hline(yintercept = 0, linetype=2, color="gray40", linewidth=0.3) + 
  geom_pointrange(aes(color=model, y=estimate, ymin=estimate-1.96*std.error,
                      ymax=estimate+1.96*std.error, shape=signif), position = position_dodge(0.8), size=0.3) + 
  coord_flip(ylim=c(-40, 15)) + theme_bw() +
  scale_color_manual("Model", guide=guide_legend(reverse=TRUE), values=c("gray70", "gray50")) + 
  scale_shape_manual("95% Significant", values=c(16, 15)) + 
  ylab("Estimate") + xlab("") +
  theme(legend.position = "bottom") + facet_grid(~trope) + guides(shape="none")

ggsave("./figures/coefPlotsTopics.pdf", width=8.75, height=8)


#### APPENDIX 4.4: Including Leader Tenure ####

#### Table A29: Results of regressions, including leader tenure ####

# Define variables and labels
controls3 = paste0("+ conf_high + democracy + personal + 
                   leaderMention + log(leaderTenure+1) + 
                   UStrade + USmilaid + USdefense + logWords + factor(country)", keyTopics)

varLabels3 = c("Global South", "Years since independence", "Conflict", "Democracy", "Personalism", 
               "Leader mention", "Leader tenure", "US trade", "US military aid", "US defense", "Entry length")

theIV = "~ globalsouth + logYSI"
predControls = controls3
countControls = controls3
varLabels = varLabels3
dataset="d"
showCoefs = TRUE

# Run regressions and store results 
source("./code/doRegressions.R")
modelsTenure = outList

# Standard estimates
# Note that the number of observations in each model appears in this code.
stargazer(modelsTenure[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels3)
# Estimates with clustered standard errors
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsTenure[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels3)


#### APPENDIX 4.5: Only Decolonized States ####

#### Table A30: Results of regressions, limited only to decolonized states ####

# Filter data to exclude entries where a country is not decolonized
d3 = d %>% filter(!(logYSI==0 & decol==0))

# Define variables and labels
varLabels4 = c("Years since independence", "Conflict", "Democracy", "Personalism", 
               "Leader mention", "US trade", "US military aid", "US defense", "Entry length")

theIV = "~ logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels4
dataset="d3"
showCoefs = TRUE

# Run regressions and store results
source("./code/doRegressions.R")
modelsDecol = outList

# Standard estimates
# Note that the number of observations in each model appears in this code.
stargazer(modelsDecol[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels4)
# Estimates with clustered standard errors
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsDecol[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels4)


#### APPENDIX 4.6: More on regional variation ####

#### Table A31: Results of regional regressions ####

# Define variables and labels
varLabels2 = c("Americas",
               "Eastern Europe", 
               "Middle East/Northern Africa", 
               "Sub-Saharan Africa", 
               "Asia",  
               "Oceania",
               "Years since independence", "Conflict", "Democracy", "Personalism", 
               "Leader mention", "US trade", "US military aid", "US defense", "Entry length")

theIV = "~ geo_region + logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels2
dataset="d"
showPreds = FALSE

# Run regressions and store results
source("./code/doRegressions.R")
modelsRegion = outList

# Standard estimates (not shown in text)
# Note that the number of observations in each model appears in this code.
stargazer(modelsRegion[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels2)
# Estimates with clustered standard errors (shown in text)
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsRegion[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels2)


#### Table A32: Results of regional regressions, separating three countries from their regions ####

# Define variables and labels
varLabels2s = c("Americas",
                "Eastern Europe", 
                "Middle East/Northern Africa", 
                "Sub-Saharan Africa", 
                "Asia",  
                "Oceania",
                "Vietnam", "USSR", "South Africa",
                "Years since independence", "Conflict", "Democracy", "Personalism", 
                "Leader mention", "US trade", "US military aid", "US defense", "Entry length")

theIV = "~ geo_region2 + logYSI"
predControls = controlsSTM
countControls = controlsSTM
varLabels = varLabels2s
dataset="d"
showCoefs = TRUE

# Run regressions and store results
source("./code/doRegressions.R")
modelsRegionSplit = outList

# Standard estimates (not shown in text)
# Note that the number of observations in each model appears in this code.
stargazer(modelsRegionSplit[1:8],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels2s)
# Estimates with clustered standard errors (shown in text)
# Note that the number of observations in each model do not appear in this code.
# Number of observations in each model are reported in the standard estimates above.
stargazer(modelsRegionSplit[9:16],
          column.sep.width = "-20pt", no.space=T, align=T, df=F,
          omit=c("Topic", "country", "admin", "date"),
          omit.stat = c("ll", "rsq", "adj.rsq", "aic", "f", "ser"),
          covariate.labels = varLabels2s)


#### APPENDIX 4.7: More on Temporal Variation ####

#### Figure A5: Coefficient estimates for the years since independence variable ####

### Perform rolling regressions 
# (Note: The for-loop below is the same code as in analyzeTropes.R) 

# Define range of time
yrange = 6

# Years to consider
years = 1961:(1977-yrange)

# Do rolling regressions
timeListGS = timeListReg = regCountList = list()
for (k in 1:length(years)) {
  
  # Start and end years
  startyear = years[k]
  endyear = years[k] + yrange
  
  # Get the data from those years
  dds = d |> filter(year >= startyear & year <= endyear)
  
  # Main models
  theIV = "~ globalsouth + logYSI"
  predControls = controlsSTM
  countControls = controlsSTM
  varLabels = varLabels1
  dataset="dds"
  showCoefs = FALSE
  
  # Run models
  source("./code/doRegressions.R")
  modelsGS = outList
  
  # Gather results
  mp_inf = tidy(modelsGS[[9]]) |> mutate(model="Probabilities", trope="Infantilization")
  mp_anim = tidy(modelsGS[[10]]) |> mutate(model="Probabilities", trope="Animal analogy")
  mp_bel = tidy(modelsGS[[11]]) |> mutate(model="Probabilities", trope="Belligerence")
  mp_irr = tidy(modelsGS[[12]]) |> mutate(model="Probabilities", trope="Irrationality")
  
  mc_inf = tidy(modelsGS[[13]]) |> mutate(model="Counts", trope="Infantilization")
  mc_anim = tidy(modelsGS[[14]]) |> mutate(model="Counts", trope="Animal analogy")
  mc_bel = tidy(modelsGS[[15]]) |> mutate(model="Counts", trope="Belligerence")
  mc_irr = tidy(modelsGS[[16]]) |> mutate(model="Counts", trope="Irrationality")
  
  mall = bind_rows(mp_inf, mp_anim, mp_bel, mp_irr,
                   mc_inf, mc_anim, mc_bel, mc_irr)
  
  mall = mall |> filter(grepl("globalsouth|logYSI", term))
  mall = mall |> relabel_predictors(c(globalsouth="Global South",
                                      logYSI="Years since\nindependence"))
  mall$signif = ifelse(mall$p.value < 0.05, "Yes", "No")
  
  # Start and end years, as well as total number of entries in the time window
  mall = mall |> mutate(start=startyear, end=endyear, windowEntries = nrow(dds))
  
  # Store results
  timeListGS[[k]] = mall
  
  ## Next, models using regions 
  theIV = "~ geo_region + logYSI"
  predControls = controlsSTM
  countControls = controlsSTM
  varLabels = varLabels2
  dataset="dds"
  showCoefs = FALSE
  
  # Run models
  source("./code/doRegressions.R")
  modelsReg = outList
  
  # Gather results
  mp_inf = tidy(modelsReg[[9]]) |> mutate(model="Probabilities", trope="Infantilization")
  mp_anim = tidy(modelsReg[[10]]) |> mutate(model="Probabilities", trope="Animal analogy")
  mp_bel = tidy(modelsReg[[11]]) |> mutate(model="Probabilities", trope="Belligerence")
  mp_irr = tidy(modelsReg[[12]]) |> mutate(model="Probabilities", trope="Irrationality")
  
  mc_inf = tidy(modelsReg[[13]]) |> mutate(model="Counts", trope="Infantilization")
  mc_anim = tidy(modelsReg[[14]]) |> mutate(model="Counts", trope="Animal analogy")
  mc_bel = tidy(modelsReg[[15]]) |> mutate(model="Counts", trope="Belligerence")
  mc_irr = tidy(modelsReg[[16]]) |> mutate(model="Counts", trope="Irrationality")
  
  mall = bind_rows(mp_inf, mp_anim, mp_bel, mp_irr,
                   mc_inf, mc_anim, mc_bel, mc_irr)
  
  mall = mall |> filter(grepl("region", term))
  
  mall = mall |> relabel_predictors(c(geo_regionAmericas="Americas",
                                      geo_regionAsia="Asia",
                                      geo_regionOceania="Oceania",
                                      'geo_regionEastern Europe'="Eastern Europe",
                                      'geo_regionMiddle East/Northern Africa' = "Middle East/Northern Africa",
                                      'geo_regionSub-Saharan Africa'="Sub-Saharan Africa",                                             
                                      logYSI="Years since\nindependence",
                                      conf_high="Conflict",
                                      democracy="Democracy",
                                      personal="Personalism",
                                      leaderMention="Leader mention",
                                      UStrade="US trade",
                                      USmilaid="US military aid",
                                      USdefense="US defense",
                                      logWords="Entry length"))
  mall$signif = ifelse(mall$p.value < 0.05, "Yes", "No")
  
  mall$term = factor(mall$term, 
                     levels=c("Americas",
                              "Eastern Europe", 
                              "Middle East/Northern Africa", 
                              "Sub-Saharan Africa", 
                              "Asia",  
                              "Oceania",
                              "Years since\nindependence", "Conflict", "Democracy",
                              "Personalism", "Leader mention", "US trade",
                              "US military aid", "US defense", "Entry length"))
  
  # Start and end years, as well as total number of entries in the time window
  mall = mall |> mutate(start=startyear, end=endyear, windowEntries = nrow(dds))
  
  # Add in counts of entries for each region
  regCounts = data.frame(table(dds$geo_region))
  mall = merge(mall, regCounts, by.x="term", by.y="Var1", all.x=T)
  
  # Store results
  timeListReg[[k]] = mall
  
  # Message indicating what year has been completed
  message(years[k])
}

# Put data together
timedataGS = rbindlist(timeListGS)
timedataGS$trope = factor(timedataGS$trope, levels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))
timedataReg = rbindlist(timeListReg)
timedataReg$trope = factor(timedataReg$trope, levels=c("Infantilization", "Animal analogy", "Belligerence", "Irrationality"))

# Make plot
timedataGS |> 
  filter(term=="Years since\nindependence") |>
  ggplot(aes(start+yrange/2, estimate)) + 
  geom_hline(yintercept=0, linetype=2) +
  geom_line(aes(color=fct_rev(model))) +
  geom_linerange(aes(ymin=estimate-1.96*std.error, ymax=estimate+1.96*std.error, color=fct_rev(model))) +
  facet_wrap(~trope, scales = "free_y") +
  ylab("Estimate") +
  theme_bw() + scale_x_continuous("Year", 
                                  breaks=c(1963, 1967, 1971, 1975), 
                                  minor_breaks = 1961:1977) +
  scale_color_manual("Model", values=c("red3", "steelblue"))
ggsave("./figures/overTimeYSI.pdf", width=7, height=4.5)